package jude.java.money.article;

import java.util.ArrayList;
import java.util.List;
import java.util.regex.Matcher;
import java.util.regex.Pattern;

import jude.java.money.article.ArticleSite.ContentHandler;

public class Targets {

	public static final List<ArticleSite> targets = new ArrayList<ArticleSite>();
	
	/** 1, ezinearticles.com 很容易出验证码 */
	public static final ArticleSite EZINEARTICLES = new ArticleSite("ezinearticles.com")
			.setPageMode(ArticleSite.PAGE_MODE_NORMAL)
			.setPageKey("page")
			.setInterval(3000)
			.setShieldString("There is an excessive amount of traffic coming from your Region.")
			.setHttp403(true)
			.setListPage(
					new Page()
					.setUrl("http://ezinearticles.com/?cat=Arts-and-Entertainment")
					.setContentRegex("<a class=\"article-title-link\" href=\"(/\\?.+&amp;id=(\\d+))\">(.+?)</a>")
					.setGroups(new int[] {1,2,3})
			)
			.setContentPage(
					new Page()
					.setContentRegex("<div id=\"body\">(.+?)</div>")
					.setGroups(new int[] {1})
					.setStartString("<div class=\"article_body\">")
			);
	
	/** 3, ideamarketers.com */
	public static final ArticleSite IDEAMARKETERS = new ArticleSite("ideamarketers.com")
			.setPageMode(ArticleSite.PAGE_MODE_RECORD)
			.setPageSize(10)
			.setPageKey("start")
			.setQueryKey("q")
			.setInterval(1000)
			.setShieldString("did not match any documents.  <br><br>Suggestions")
			.setQueryPage(
					new Page()
					.setUrl("http://www.google.com/custom?hl=en&safe=active&client=pub-0030293851970063&cof=FORID:10%3BAH:left%3BCX:Google%2520Search%2520Aug%25202010%3BL:http://www.google.com/intl/en/images/logos/custom_search_logo_sm.gif%3BLH:30%3BLP:1%3BLC:%23063e3f%3BVLC:%230d8f63%3B&rurl=http://www.ideamarketers.com/searchresults.cfm%3Fcx%3Dpartner-pub-0030293851970063%253A6y3wd3ydzts%26cof%3DFORID%253A10%26ie%3DISO-8859-1%26q%3Dbags%26sa%3DSearch%26siteurl%3Dwww.ideamarketers.com%252F%253FBusiness_Coaching%2526articleid%253D2249192%26siteurl%3Dwww.ideamarketers.com%252F%253FBusiness_Coaching%2526articleid%253D2249192%26google_rsg%3D__akpxz91ukB0djjCd6lDhXYrb-W0%3D&cx=partner-pub-0030293851970063:6y3wd3ydzts&ad=w9&adkw=AELymgUJjOYEuCQ2YbvIxtsKJs56dfIU9EDO_WDRizYDX2iVup5uEvqEx8kzp8F-5Fv7q6hCVlcV5Zsjl3k3GQKbuoSn5m8_DoEcSzDul8YKY5U5VAcyTbg&channel=2587737822&boostcse=0&oe=ISO-8859-1&ei=d-XNTf-dM4fmrAeZusXCCg&q=shoe&start=20&sa=N")
//					.setRegex("<a class=\"l\" href=\"(http://www\\.ideamarketers\\.com/([^\"]+?)articleid=(\\d+))\" onmousedown=\"[^\"]+\" target=\"[^\"]+\">(.+?)</a>")
					.setContentRegex("<a href=\"(http://(?:www\\.)?ideamarketers\\.com/([^\"]+?)articleid=(\\d+))\" target=[\"']?[a-zA-Z_]+[\"']? class=[\"']?l[\"']? onmousedown=\"[^\"]+\">(.+?)</a>")
					.setGroups(new int[] {1, 3, 4})
			)
			.setContentPage(
					new Page()
					.setContentRegex("<P>\\s+<font face=\"Verdana,Arial,Helvetica\" size=\"2\">([\\w\\W]+?)</font>")
					.setGroups(new int[] {1})
					.setFollowRedirects(true)
			)
			.setPathTemplate(ArticleSite.DEFAULT_QUERY_PATH_TEMPLATE);
	
	/**
	 * ideamarketers.com
	 */
	public static final ArticleSite IDEAMARKETERS2 = new ArticleSite("ideamarketers.com")
			.setContentPage(
					new Page()
					.setUrl("http://www.ideamarketers.com/?articleid=${PAGE_ID}")
//					.setTitleRegex("<font size=5>\\s*(?:<a href=\"http://www\\.ideamarketers\\.com/awards\\.cfm\" target=\"resourcewindow\"><img src=\"http://www\\.ideamarketers\\.com/images/award-topcontributor\\.gif\" align=\"right\" border=\"0\"></a>\\s*)?<strong>(.+?)</strong>")
					.setTitleRegex("<font size=5>([\\w\\W]+?)</strong>")
					.setContentRegex("<P>\\s+<font face=\"Verdana,Arial,Helvetica\" size=\"2\">([\\w\\W]+?)</font>")
					.setGroups(new int[] {1})
					.setFollowRedirects(true)
					.setFailures(new String[] {
							"Sorry, the article that you requested is no longer on our system."
					})
			)
			.setInterval(1)
			.setPathTemplate("{BASE}\\{SITE}\\{PAGE_SCOPE}\\{TITLE}.txt") ;
	
	/**
	 * biz-whiz.com
	 * */
	public static final ArticleSite BIZ_WHIZ = new ArticleSite("biz-whiz.com")
			.setContentPage(
					new Page()
					.setUrl("http://biz-whiz.com/article${PAGE_ID}.html")
					.setTitleRegex("<center><h1>(.+)</h1></center>")
					.setContentRegex("<font class=\"pn-art\"><table ><tr><td>([\\w\\W]+?)</font></td></tr></table>")
					.setFollowRedirects(true)
//					.setFailures(new String[] {})
			)
			.setPathTemplate("{BASE}\\{SITE}\\{PAGE_SCOPE}\\{TITLE}.txt")	//{ID}-{TITLE}.txt
			.setInterval(1000)
			.setContentHandler(new ContentHandler() {
				@Override
				public String handler(String content) {
					Pattern p = Pattern.compile("[\\-]{10,}([^\\-]{1,20})Author");
					Matcher m = p.matcher(content);
					if (m.find()) {
						int start = m.start(0);
						content = content.substring(0, start);
					}
					return content;
				}
			});
	
	static {
		targets.add(EZINEARTICLES);
		targets.add(IDEAMARKETERS);
	}
}
